#ifndef __CUDA_DRIVER_TYPES_H__
#define __CUDA_DRIVER_TYPES_H__

#include "mc_runtime_types.h"

#if defined(_MSC_VER)
typedef unsigned __int32 mcuint32_t;
typedef unsigned __int64 mcuint64_t;
#else
#include <stdint.h>
typedef uint32_t mcuint32_t;
typedef uint64_t mcuint64_t;
#endif

// Developer note - when updating these, update the mcErrorTomcDrvError functions
typedef enum mcDrvError_enum {
    MC_SUCCESS                        = 0,
    MC_ERROR_INVALID_VALUE            = 1,
    MC_ERROR_OUT_OF_MEMORY            = 2,
    MC_ERROR_NOT_INITIALIZED          = 3,
    MC_ERROR_DEINITIALIZED            = 4,
    MC_ERROR_PROFILER_DISABLED        = 5,
    MC_ERROR_PROFILER_NOT_INITIALIZED = 6,
    MC_ERROR_PROFILER_ALREADY_STARTED = 7,
    MC_ERROR_PROFILER_ALREADY_STOPPED = 8,

    MC_ERROR_STUB_LIBRARY = 34,

    MC_ERROR_NO_DEVICE           = 100,
    MC_ERROR_INVALID_DEVICE      = 101,
    MC_ERROR_DEVICE_NOT_LICENSED = 102,

    MC_ERROR_INVALID_IMAGE                  = 200,
    MC_ERROR_INVALID_CONTEXT                = 201,
    MC_ERROR_CONTEXT_ALREADY_CURRENT        = 202,
    MC_ERROR_MAP_FAILED                     = 205,
    MC_ERROR_UNMAP_FAILED                   = 206,
    MC_ERROR_ARRAY_IS_MAPPED                = 207,
    MC_ERROR_ALREADY_MAPPED                 = 208,
    MC_ERROR_NO_BINARY_FOR_GPU              = 209,
    MC_ERROR_ALREADY_ACQUIRED               = 210,
    MC_ERROR_NOT_MAPPED                     = 211,
    MC_ERROR_NOT_MAPPED_AS_ARRAY            = 212,
    MC_ERROR_NOT_MAPPED_AS_POINTER          = 213,
    MC_ERROR_ECC_UNCORRECTABLE              = 214,
    MC_ERROR_UNSUPPORTED_LIMIT              = 215,
    MC_ERROR_CONTEXT_ALREADY_IN_USE         = 216,
    MC_ERROR_PEER_ACCESS_UNSUPPORTED        = 217,
    MC_ERROR_INVALID_KERNEL_FILE            = 218,
    MC_ERROR_INVALID_GRAPHICS_CONTEXT       = 219,
    MC_ERROR_MXLINK_UNCORRECTABLE           = 220,
    MC_ERROR_JIT_COMPILER_NOT_FOUND         = 221,
    MC_ERROR_UNSUPPORTED_KERNEL_VERSION     = 222,
    MC_ERROR_JIT_COMPILATION_DISABLED       = 223,
    MC_ERROR_UNSUPPORTED_EXEC_AFFINITY      = 224,
    MC_ERROR_INVALID_SOURCE                 = 300,
    MC_ERROR_FILE_NOT_FOUND                 = 301,
    MC_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND = 302,
    MC_ERROR_SHARED_OBJECT_INIT_FAILED      = 303,
    MC_ERROR_OPERATING_SYSTEM               = 304,
    MC_ERROR_INVALID_HANDLE                 = 400,
    MC_ERROR_ILLEGAL_STATE                  = 401,
    MC_ERROR_NOT_FOUND                      = 500,
    MC_ERROR_NOT_READY                      = 600,
    MC_ERROR_ILLEGAL_ADDRESS                = 700,
    MC_ERROR_LAUNCH_OUT_OF_RESOURCES        = 701,
    MC_ERROR_LAUNCH_TIMEOUT                 = 702,
    MC_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING  = 703,
    MC_ERROR_PEER_ACCESS_ALREADY_ENABLED    = 704,
    MC_ERROR_PEER_ACCESS_NOT_ENABLED        = 705,
    MC_ERROR_PRIMARY_CONTEXT_ACTIVE         = 708,
    MC_ERROR_CONTEXT_IS_DESTROYED           = 709,
    MC_ERROR_ASSERT                         = 710,
    MC_ERROR_TOO_MANY_PEERS                 = 711,
    MC_ERROR_HOST_MEMORY_ALREADY_REGISTERED = 712,
    MC_ERROR_HOST_MEMORY_NOT_REGISTERED     = 713,
    MC_ERROR_HARDWARE_STACK_ERROR           = 714,
    MC_ERROR_ILLEGAL_INSTRUCTION            = 715,
    MC_ERROR_MISALIGNED_ADDRESS             = 716,
    MC_ERROR_INVALID_ADDRESS_SPACE          = 717,
    MC_ERROR_INVALID_PC                     = 718,
    MC_ERROR_LAUNCH_FAILED                  = 719,
    MC_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE   = 720,
    MC_ERROR_NOT_PERMITTED                  = 800,
    MC_ERROR_NOT_SUPPORTED                  = 801,
    MC_ERROR_SYSTEM_NOT_READY               = 802,
    MC_ERROR_SYSTEM_DRIVER_MISMATCH         = 803,
    MC_ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE = 804,
    MC_ERROR_MPS_CONNECTION_FAILED          = 805,
    MC_ERROR_MPS_RPC_FAILURE                = 806,
    MC_ERROR_MPS_SERVER_NOT_READY           = 807,
    MC_ERROR_MPS_MAX_CLIENTS_REACHED        = 808,
    MC_ERROR_MPS_MAX_CONNECTIONS_REACHED    = 809,
    MC_ERROR_STREAM_CAPTURE_UNSUPPORTED     = 900,
    MC_ERROR_STREAM_CAPTURE_INVALIDATED     = 901,
    MC_ERROR_STREAM_CAPTURE_MERGE           = 902,
    MC_ERROR_STREAM_CAPTURE_UNMATCHED       = 903,
    MC_ERROR_STREAM_CAPTURE_UNJOINED        = 904,
    MC_ERROR_STREAM_CAPTURE_ISOLATION       = 905,
    MC_ERROR_STREAM_CAPTURE_IMPLICIT        = 906,
    MC_ERROR_CAPTURED_EVENT                 = 907,
    MC_ERROR_STREAM_CAPTURE_WRONG_THREAD    = 908,
    MC_ERROR_TIMEOUT                        = 909,
    MC_ERROR_GRAPH_EXEC_UPDATE_FAILURE      = 910,
    MC_ERROR_EXTERNAL_DEVICE                = 911,
    MC_ERROR_UNKNOWN                        = 999
} mcDrvError_t;

/*
 * CAUTION:
 * Because of in cuda driver, device pointer is defined as address (unsigned long long), but in maca
 * runtime implementation, it is defined as void *, hence explictly type conversion is NECESSARY!!
 *
 * For arguments with type mcDrvDeviceptr_t *, please convert it to a local variable and assign it
 * back after calling runtime implementation.
 * EXAMPLE: dptr is mcDrvDeviceptr_t *
 *
 *      auto temp_ptr = reinterpret_cast<mcDeviceptr_t>(static_cast<uintptr_t>(*dptr));
 *      auto ret      = mcErrorTomcDrvError(mcMemAllocAsyncImpl(&temp_ptr, bytesize, hStream));
 *      *dptr         = reinterpret_cast<uintptr_t>(temp_ptr);
 *      MC_DRV_RETURN(ret);
 *
 * For arguments wity type mcDrvDevicePtr_t, convert it before passing it to runtime implementation.
 * EXAMPLE: addr is mcDrvDevicePtr_t
 *
 *      MC_DRV_RETURN(mcErrorTomcDrvError(mcStreamWriteValue32Impl(
 *          stream, reinterpret_cast<mcDeviceptr_t>(static_cast<uintptr_t>(addr)), value, flags)))
 *
 */
#ifdef __cplusplus
using mcDrvDeviceptr_t = unsigned long long; /* runtime device address equal mcDeviceptr_t */
#else
typedef unsigned long long mcDrvDeviceptr_t; /* runtime device address equal mcDeviceptr_t */
#endif
typedef unsigned int mcDrvDeviceptrv1_t;
typedef mcUuid_t mcDrvUuid_t;
typedef mcDevice_t mcDrvDevice_t;

#define MC_COOPERATIVE_LAUNCH_MULTI_DEVICE_NO_PRE_LAUNCH_SYNC  0x01
#define MC_COOPERATIVE_LAUNCH_MULTI_DEVICE_NO_POST_LAUNCH_SYNC 0x02
#define MC_ARRAY3D_LAYERED                                     0x01
#define MC_ARRAY3D_2DARRAY                                     0x01
#define MC_ARRAY3D_SURFACE_LDST                                0x02
#define MC_ARRAY3D_CUBEMAP                                     0x04
#define MC_ARRAY3D_TEXTURE_GATHER                              0x08
#define MC_ARRAY3D_DEPTH_TEXTURE                               0x10
#define MC_ARRAY3D_COLOR_ATTACHMENT                            0x20
#define MC_ARRAY3D_SPARSE                                      0x40
#define MC_ARRAY3D_DEFERRED_MAPPING                            0x80

#define MC_TRSA_OVERRIDE_FORMAT                0x01
#define MC_TRSF_READ_AS_INTEGER                0x01
#define MC_TRSF_NORMALIZED_COORDINATES         0x02
#define MC_TRSF_SRGB                           0x10
#define MC_TRSF_DISABLE_TRILINEAR_OPTIMIZATION 0x20
#define MC_TRSF_SEAMLESS_CUBEMAP               0x40
#define MC_PARAM_TR_DEFAULT                    -1

#define MC_DEVICE_CPU     ((mcDrvDevice_t)-1)
#define MC_DEVICE_INVALID ((mcDrvDevice_t)-2)

// NOTCE: mcDrvDeviceAttribute_t must agree with mcDeviceAttribute_t
typedef enum mcDrvDeviceAttribute_enum {
    MC_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 1,   //< Maximum number of threads per block.
    MC_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X,             //< Maximum x-dimension of a block.
    MC_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y,             //< Maximum y-dimension of a block.
    MC_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z,             //< Maximum z-dimension of a block.
    MC_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X,              //< Maximum x-dimension of a grid.
    MC_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y,              //< Maximum y-dimension of a grid.
    MC_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z,              //< Maximum z-dimension of a grid.
    MC_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK, //< Maximum shared memory available per block
                                                     // in bytes.
    MC_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY,       //< Constant memory size in bytes.
    MC_DEVICE_ATTRIBUTE_WARP_SIZE,                   //< Warp size in threads.
    MC_DEVICE_ATTRIBUTE_MAX_PITCH,               //< Maximum pitch in bytes allowed by memory copies
    MC_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK, //< Maximum number of 32-bit registers available<
                                                 // to a thread block. < This number is shared by
                                                 // all thread blocks simultaneously resident on a
                                                 // multiprocessor.
    MC_DEVICE_ATTRIBUTE_CLOCK_RATE,              //< Peak clock frequency in kilohertz.
    MC_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT,       //< Alignment requirement for textures
    MC_DEVICE_ATTRIBUTE_GPU_OVERLAP, //< Device can possibly copy memory and execute a kernel
                                     // concurrently. Deprecated. Use instead
                                     // MC_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT.
    MC_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,  //< Number of multiprocessors on the device.
    MC_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT,   //< Run time limit for kernels executed on the device
    MC_DEVICE_ATTRIBUTE_INTEGRATED,            //< iGPU
    MC_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY,   //< Device can map host memory into device address<
                                               // space
    MC_DEVICE_ATTRIBUTE_COMPUTE_MODE,          //< Compute mode that device is currently in.
    MC_DEVICE_ATTRIBUTE_MAX_TEXTURE_1D_WIDTH,  //< Maximum number of elements in 1D images
    MC_DEVICE_ATTRIBUTE_MAX_TEXTURE_2D_WIDTH,  // Maximum dimension width of 2D images in image
                                               // elements
    MC_DEVICE_ATTRIBUTE_MAX_TEXTURE_2D_HEIGHT, // Maximum dimension height of 2D images in image
                                               // elements
    MC_DEVICE_ATTRIBUTE_MAX_TEXTURE_3D_WIDTH,  //< Maximum dimension width of 3D images in image
                                               // elements
    MC_DEVICE_ATTRIBUTE_MAX_TEXTURE_3D_HEIGHT, //< Maximum dimensions height of 3D images in image
                                               // elements
    MC_DEVICE_ATTRIBUTE_MAX_TEXTURE_3D_DEPTH,  //< Maximum dimensions depth of 3D images in image
                                               // elements
    MC_DEVICE_ATTRIBUTE_MAX_TEXTURE_2D_LAYERED_WIDTH,
    MC_DEVICE_ATTRIBUTE_MAX_TEXTURE_2D_LAYERED_HEIGHT,
    MC_DEVICE_ATTRIBUTE_MAX_TEXTURE_2D_LAYERED_LAYERS,
    MC_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT,
    MC_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS, //< Device can possibly execute multiple kernels
                                            // concurrently.
    MC_DEVICE_ATTRIBUTE_ECC_ENABLED,        //< Device has ECC support enabled
    MC_DEVICE_ATTRIBUTE_PCI_BUS_ID,         //< PCI Bus ID.
    MC_DEVICE_ATTRIBUTE_PCI_DEVICE_ID,      //< PCI Device ID.
    MC_DEVICE_ATTRIBUTE_TCC_DRIVER,
    MC_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE,  //< Peak memory clock frequency in kilohertz.
    MC_DEVICE_ATTRIBUTE_MEMORY_BUS_WIDTH,   //< Global memory bus width in bits.
    MC_DEVICE_ATTRIBUTE_L2_CACHE_SIZE, //< Size of L2 cache in bytes. 0 if the device doesn't have
                                       // L2 cache.
    MC_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR, //< Maximum resident threads per
                                                        // multiprocessor.
    MC_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT,             // Number of asynchronous engines.
    MC_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING, //< Device shares a unified address space with the host
    MC_DEVICE_ATTRIBUTE_MAX_TEXTURE_1D_LAYERED_WIDTH,
    MC_DEVICE_ATTRIBUTE_MAX_TEXTURE_1D_LAYERED_LAYERS,
    MC_DEVICE_ATTRIBUTE_RESERVERED_44,
    MC_DEVICE_ATTRIBUTE_MAX_TEXTURE_2D_GATHER_WIDTH,  // Not implement
    MC_DEVICE_ATTRIBUTE_MAX_TEXTURE_2D_GATHER_HEIGHT, // Not implement
    MC_DEVICE_ATTRIBUTE_MAX_TEXTURE_3D_WIDTH_ALT,     // Not implement
    MC_DEVICE_ATTRIBUTE_MAX_TEXTURE_3D_HEIGHT_ALT,    // Not implement
    MC_DEVICE_ATTRIBUTE_MAX_TEXTURE_3D_DEPTH_ALT,     // Not implement
    MC_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID,
    MC_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT,      // Pitch alignment requirement for 2D texture
                                                      // references bound to pitched memory;
    MC_DEVICE_ATTRIBUTE_MAX_TEXTURE_CUBEMAP_WIDTH,    // Not implement
    MC_DEVICE_ATTRIBUTE_MAX_TEXTURE_CUBEMAP_LAYERED_WIDTH,  // not implement
    MC_DEVICE_ATTRIBUTE_MAX_TEXTURE_CUBEMAP_LAYERED_LAYERS, // not implement
    MC_DEVICE_ATTRIBUTE_MAX_SURFACE_1D_WIDTH,               // not implement
    MC_DEVICE_ATTRIBUTE_MAX_SURFACE_2D_WIDTH,               // not implement
    MC_DEVICE_ATTRIBUTE_MAX_SURFACE_2D_HEIGHT,              // not implement
    MC_DEVICE_ATTRIBUTE_MAX_SURFACE_3D_WIDTH,               // not implement
    MC_DEVICE_ATTRIBUTE_MAX_SURFACE_3D_HEIGHT,              // not implement
    MC_DEVICE_ATTRIBUTE_MAX_SURFACE_3D_DEPTH,               // not implement
    MC_DEVICE_ATTRIBUTE_MAX_SURFACE_1D_LAYERED_WIDTH,       // not implement
    MC_DEVICE_ATTRIBUTE_MAX_SURFACE_1D_LAYERED_LAYERS,      // not implement
    MC_DEVICE_ATTRIBUTE_MAX_SURFACE_2D_LAYERED_WIDTH,       // not implement
    MC_DEVICE_ATTRIBUTE_MAX_SURFACE_2D_LAYERED_HEIGHT,      // not implement
    MC_DEVICE_ATTRIBUTE_MAX_SURFACE_2D_LAYERED_LAYERS,      // not implement
    MC_DEVICE_ATTRIBUTE_MAX_SURFACE_CUBEMAP_WIDTH,          // not implement
    MC_DEVICE_ATTRIBUTE_MAX_SURFACE_CUBEMAP_LAYERED_WIDTH,  // not implement
    MC_DEVICE_ATTRIBUTE_MAX_SURFACE_CUBEMAP_LAYERED_LAYERS, // not implement
    MC_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH,     // Maximum 1D linear texture width
    MC_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH,     // Maximum 2D linear texture width
    MC_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT,    // Maximum 2D linear texture height
    MC_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH,  // Maximum 2D linear texture pitch in bytes
    MC_DEVICE_ATTRIBUTE_MAX_TEXTURE_2D_MIPMAPPED_WIDTH,  // not implement
    MC_DEVICE_ATTRIBUTE_MAX_TEXTURE_2D_MIPMAPPED_HEIGHT, // not implement
    MC_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR,        // Major compute capability version number.
    MC_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR,        // Minor compute capability version number.
    MC_DEVICE_ATTRIBUTE_MAX_TEXTURE_1D_MIPMAPPED_WIDTH,  // not implement
    MC_DEVICE_ATTRIBUTE_STREAM_PRIORITIES_SUPPORTED,     // not implement
    MC_DEVICE_ATTRIBUTE_GLOBAL_L1_CACHE_SUPPORTED,       // not implement
    MC_DEVICE_ATTRIBUTE_LOCAL_L1_CACHE_SUPPORTED,        // not implement
    MC_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR, // Maximum Shared Memory Per
                                                              // Multiprocessor.
    MC_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR,     // Maximum number of 32-bit registers
                                                              // available to a multiprocessor
    MC_DEVICE_ATTRIBUTE_MANAGED_MEMORY,  // Device supports allocating managed memory on this system
    MC_DEVICE_ATTRIBUTE_IS_MULTI_GPU_BOARD,       // Multiple GPU devices
    MC_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD_GROUP_ID, // not implement Unique identifier for a group of
                                                  // devices on the same multi-GPU board
    MC_DEVICE_ATTRIBUTE_HOST_NATIVE_ATOMIC_SUPPORTED,
    MC_DEVICE_ATTRIBUTE_SINGLE_TO_DOUBLE_PRECISION_PERF_RATIO, // not implement,Ratio of single
                                                               // precision performance (in
                                                               // floating-point operations per
                                                               // second) to double precision
                                                               // performance
    MC_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS,    //< Device supports coherently accessing pageable
                                                   // memory without calling mcHostRegister on it

    MC_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS, //< Device can coherently access managed memory
                                                   // concurrently with the CPU
    MC_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED,
    MC_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERD_MEM, // not implement, 1 if the device
                                                                // can access host registered memory
                                                                // at the same virtual address as
                                                                // the CPU, and 0 otherwise
    MC_DEVICE_ATTRIBUTE_RESERVED_92,
    MC_DEVICE_ATTRIBUTE_RESERVED_93,
    MC_DEVICE_ATTRIBUTE_RESERVED_94,

    MC_DEVICE_ATTRIBUTE_COOPERATIVE_LAUNCH,              //< Support cooperative launch
    MC_DEVICE_ATTRIBUTE_COOPERATIVE_MULTI_DEVICE_LAUNCH, //< Support cooperative launch on multiple
                                                         // devices
    MC_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTION, // Maximum per block shared
                                                            // memory size on the device. This value
                                                            // can be opted into when using
                                                            // mcFuncSetAttribute
    MC_DEVICE_ATTRIBUTE_CAN_FLUSH_REMOTE_WRITES, //< The MC_STREAM_WAIT_VALUE_FLUSH flag and the
                                                 // MC_STREAM_MEM_OP_FLUSH_REMOTE_WRITES MemOp are
                                                 // supported on the device
    MC_DEVICE_ATTRIBUTE_HOST_REGISTER_SUPPORTED, // not implement
    MC_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES, //< Device accesses pageable
                                                                      // memory via the host's page
                                                                      // tables
    MC_DEVICE_ATTRIBUTE_DIRECT_MANAGED_MEM_ACCESS_FROM_HOST, //< Host can directly access managed
                                                             // memory on the device withou
                                                             // migration
    MC_DEVICE_ATTRIBUTE_RESERVED_102,
    MC_DEVICE_ATTRIBUTE_RESERVED_103,
    MC_DEVICE_ATTRIBUTE_RESERVED_104,
    MC_DEVICE_ATTRIBUTE_RESERVED_105,
    MC_DEVICE_ATTRIBUTE_MAX_BLOCKS_PER_MULTIPROCESSOR,    // Maximum number of blocks per
                                                          // multiprocessor
    MC_DEVICE_ATTRIBUTE_RESERVED_107,
    MC_DEVICE_ATTRIBUTE_MAX_PERSISTING_L2_CACHE_SIZE,     // Device's maximum l2 persisting setting
                                                          // bytes.
    MC_DEVICE_ATTRIBUTE_MAX_ACCESS_POLICY_WINDOW_SIZE,    //< The max value of mcAccessPolicyWindow
                                                          // bytes.
    MC_DEVICE_ATTRIBUTE_RESERVED_110,
    MC_DEVICE_ATTRIBUTE_RESERVED_SHARED_MEMORY_PER_BLOCK, // not implement, Shared memory reserved
                                                          // by maca driver per block in bytes
    MC_DEVICE_ATTRIBUTE_SPARSE_MACA_ARRAY_SUPPORTED,      // not implement, 1 if the device supports
                                                     // sparse maca arrays and sparse maca mipmapped
                                                     // arrays.
    MC_DEVICE_ATTRIBUTE_HOST_REGISTER_READ_ONLY_SUPPORTED, // not implement,  Device supports using
                                                           // the mcHostRegister flag
                                                           //  mcHostRegisterReadOnly to register
                                                           //  memory that must be mapped as
                                                           //  read-only to the GPU
    MC_DEVICE_ATTRIBUTE_TIMELINE_SEMAPHORE_INTEROP_SUPPORTED,
    MC_DEVICE_ATTRIBUTE_MEMORY_POOLS_SUPPORTED, //< Device supports using the ::mcMemAllocAsync and
                                                //::mcMemPool family of APIs
    MC_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_SUPPORTED, // not implement, 1 if the device supports
                                                   // GPUDirect RDMA APIs, and 0 otherwise
    MC_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_FLUSH_WRITES_OPTIONS,  // not implement, bitmask to be
                                                               // interpreted according to the
                                                               // mcFlushGPUDirectRDMAWritesOptions
                                                               // enum
    MC_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WRITES_ORDERING,       // not implement
    MC_DEVICE_ATTRIBUTE_MEMORY_POOL_SUPPORTED_HANDLE_TYPES,    //< Bitmask of handle types supported
                                                               // with mempool based IPC
    MC_DEVICE_ATTRIBUTE_CLUSTERLAUNCH,
    MC_DEVICE_ATTRIBUTE_DEFERRED_MAPPING_MACA_ARRAY_SUPPORTED, // not implement, 1 if the device
                                                               // supports deferred mapping maca
                                                               // arrays and maca mipmapped arrays.
    MC_DEVICE_ATTRIBUTE_RESERVED_122,
    MC_DEVICE_ATTRIBUTE_RESERVED_123,
    MC_DEVICE_ATTRIBUTE_RESERVED_124,
    MC_DEVICE_ATTRIBUTE_IPC_EVENT_SUPPORT, // not implemnt, 1 if the device supports IPC Events
    MC_DEVICE_ATTRIBUTE_MEM_SYNC_DOMAIN_COUNT,
    MC_DEVICE_ATTRIBUTE_RESERVED_127,
    MC_DEVICE_ATTRIBUTE_RESERVED_128,
    MC_DEVICE_ATTRIBUTE_RESERVED_129,
    MC_DEVICE_ATTRIBUTE_NUMA_CONFIG,
    MC_DEVICE_ATTRIBUTE_NUMA_ID,
    MC_DEVICE_ATTRIBUTE_RESERVED_132,
    MC_DEVICE_ATTRIBUTE_MPS_ENABLED,
    MC_DEVICE_ATTRIBUTE_HOST_MUMA_ID,
    MC_DEVICE_ATTRIBUTE_WAVE_SIZE,          // Wave size in threads.
    MC_DEVICE_ATTRIBUTE_HDP_MEM_FLUSH_CNTL, //< Address of the HDP_MEM_COHERENCY_FLUSH_CNTL register
    MC_DEVICE_ATTRIBUTE_HDP_REG_FLUSH_CNTL, //< Address of the HDP_REG_COHERENCY_FLUSH_CNTL register

    MC_DEVICE_ATTRIBUTE_COOPERATIVE_MULTI_DEVICE_UNMATCHED_FUNC, //< Supports cooperative launch on
                                                                 // multiple devices with unmatched
                                                                 // functions
    MC_DEVICE_ATTRIBUTE_COOPERATIVE_MULTI_DEVICE_UNMATCHED_GRID_DIM, //< Supports cooperative launch
                                                                     // on multiple devices with
                                                                     // unmatched grid dimensions
    MC_DEVICE_ATTRIBUTE_COOPERATIVE_MULTI_DEVICE_UNMATCHED_BLOCK_DIM,  //< Supports cooperative
                                                                       // launch on multiple devices
                                                                       // with unmatched block
                                                                       // dimensions
    MC_DEVICE_ATTRIBUTE_COOPERATIVE_MULTI_DEVICE_UNMATCHED_SHARED_MEM, //< Supports cooperative
                                                                       // launch on multiple devices
                                                                       // with unmatched shared
                                                                       // memories
    MC_DEVICE_ATTRIBUTE_ASIC_REVISION,                       //< Revision of the GPU in this device

    MC_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED, //< Device supports virtual memory
                                                             // management APIs like
                                                             // mcMemAddressReserve, mcMemCreate,
                                                             // mcMemMap and related APIs
    MC_DEVICE_ATTRIBUTE_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR_SUPPORTED, //< Device supports exporting
                                                                     // memory to a posix file
                                                                     // descriptor with
                                                                     // mcMemExportToShareableHandle,
                                                                     // if requested via mcMemCreate
    MC_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_HANDLE_SUPPORTED, //< Device supports exporting memory to
                                                            // a Win32 NT handle with
                                                            //::mcMemExportToShareableHandle, if
                                                            // requested via mcMemCreate
    MC_DEVICE_ATTRIBUTE_GENERIC_COMPRESSION_SUPPORTED,      //< Device supports compressible memory
                                                            // allocation via mcMemCreate
    MC_DEVICE_ATTRIBUTE_CAN_USES_STREAM_WAIT_VALUE, // '1' if Device supports mcStreamWaitValue32()
                                                    // and mcStreamWaitValue64() , '0' otherwise.
    MC_DEVICE_ATTRIBUTE_CAN_USE_STREAM_MEM_OPS,     // mcStreamBatchMemOp and related APIs are
                                                    // supported.
    MC_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR, // MC_STREAM_WAIT_VALUE_NOR is supported.

    MC_DEVICE_ATTRIBUTE_UNKNOW,
    MC_DEVICE_ATTRIBUTE_MAX
} mcDrvDeviceAttribute_t;

/**
 * Legacy device properties
 */
typedef struct mcDrvDevprop_st {
    int maxThreadsPerBlock;  /**< Maximum number of threads per block */
    int maxThreadsDim[3];    /**< Maximum size of each dimension of a block */
    int maxGridSize[3];      /**< Maximum size of each dimension of a grid */
    int sharedMemPerBlock;   /**< Shared memory available per block in bytes */
    int totalConstantMemory; /**< Constant memory available on device in bytes */
    int PEUWidth;            /**< Warp size in threads */
    int memPitch;            /**< Maximum pitch in bytes allowed by memory copies */
    int regsPerBlock;        /**< 32-bit registers available per block */
    int clockRate;           /**< Clock frequency in kilohertz */
    int textureAlign;        /**< Alignment requirement for textures */
} mcDrvDevprop;

typedef enum mcDrvMemoryAdvise_enum {
    MC_MEM_ADVISE_SET_READ_MOSTLY          = 1, /* Data will mostly be read and only occassionally be written to */
    MC_MEM_ADVISE_UNSET_READ_MOSTLY        = 2, /* Undo the effect of ::CU_MEM_ADVISE_SET_READ_MOSTLY */
    MC_MEM_ADVISE_SET_PREFERRED_LOCATION   = 3, /* Set the preferred location for the data as the specified device */
    MC_MEM_ADVISE_UNSET_PREFERRED_LOCATION = 4, /* Clear the preferred location for the data */
    MC_MEM_ADVISE_SET_ACCESSED_BY          = 5, /* Data will be accessed by the specified device, so prevent page faults as much as possible */
    MC_MEM_ADVISE_UNSET_ACCESSED_BY        = 6  /* Let the Unified Memory subsystem decide on the page faulting policy for the specified device */
} mcDrvMemoryAdvise;

typedef enum mcDrvPointerAttributeType_enum {
    MC_POINTER_ATTRIBUTE_CONTEXT = 1,                     /* The ::CUcontext on which a pointer was allocated or registered */
    MC_POINTER_ATTRIBUTE_MEMORY_TYPE = 2,                 /* The ::CUmemorytype describing the physical location of a pointer */
    MC_POINTER_ATTRIBUTE_DEVICE_POINTER = 3,              /* The address at which a pointer's memory may be accessed on the device */
    MC_POINTER_ATTRIBUTE_HOST_POINTER = 4,                /* The address at which a pointer's memory may be accessed on the host */
    MC_POINTER_ATTRIBUTE_P2P_TOKENS = 5,                  /* A pair of tokens for use with the nv-p2p.h Linux kernel interface */
    MC_POINTER_ATTRIBUTE_SYNC_MEMOPS = 6,                 /* Synchronize every synchronous memory operation initiated on this region */
    MC_POINTER_ATTRIBUTE_BUFFER_ID = 7,                   /* A process-wide unique ID for an allocated memory region*/
    MC_POINTER_ATTRIBUTE_IS_MANAGED = 8,                  /* Indicates if the pointer points to managed memory */
    MC_POINTER_ATTRIBUTE_DEVICE_ORDINAL = 9,              /* A device ordinal of a device on which a pointer was allocated or registered */
    MC_POINTER_ATTRIBUTE_IS_LEGACY_CUDA_IPC_CAPABLE = 10, /* 1 if this pointer maps to an allocation that is suitable for ::cudaIpcGetMemHandle, 0 otherwise **/
    MC_POINTER_ATTRIBUTE_RANGE_START_ADDR = 11,           /* Starting address for this requested pointer */
    MC_POINTER_ATTRIBUTE_RANGE_SIZE = 12,                 /* Size of the address range for this requested pointer */
    MC_POINTER_ATTRIBUTE_MAPPED = 13,                     /* 1 if this pointer is in a valid address range that is mapped to a backing allocation, 0 otherwise **/
    MC_POINTER_ATTRIBUTE_ALLOWED_HANDLE_TYPES = 14,       /* Bitmask of allowed ::CUmemAllocationHandleType for this allocation **/
    MC_POINTER_ATTRIBUTE_IS_GPU_DIRECT_RDMA_CAPABLE = 15, /* 1 if the memory this pointer is referencing can be used with the GPUDirect RDMA API **/
    MC_POINTER_ATTRIBUTE_ACCESS_FLAGS = 16,               /* Returns the access flags the device associated with the current context has on the corresponding memory referenced by the pointer given */
    MC_POINTER_ATTRIBUTE_MEMPOOL_HANDLE = 17              /* Returns the mempool handle for the allocation if it was allocated from a mempool. Otherwise returns NULL. **/
} mcDrvPointerAttributeType;

typedef enum mcDrvJitInputType_enum {
    MC_JIT_INPUT_BITCODE = 0,
    MC_JIT_INPUT_DEVICE_OBJECT,
    MC_JIT_INPUT_FATBINARY,
    MC_JIT_INPUT_OBJECT,
    MC_JIT_INPUT_LIBRARY,
    MC_JIT_NUM_INPUT_TYPES,
} mcDrvJitInputType_t;

typedef enum mcDrvJitOption_enum {
    MC_JIT_MAX_REGISTERS = 0,
    MC_JIT_THREADS_PER_BLOCK,
    MC_JIT_WALL_TIME,
    MC_JIT_INFO_LOG_BUFFER,
    MC_JIT_INFO_LOG_BUFFER_SIZE_BYTES,
    MC_JIT_ERROR_LOG_BUFFER,
    MC_JIT_ERROR_LOG_BUFFER_SIZE_BYTES,
    MC_JIT_OPTIMIZATION_LEVEL,
    MC_JIT_TARGET_FROM_CUCONTEXT,
    MC_JIT_TARGET,
    MC_JIT_FALLBACK_STRATEGY,
    MC_JIT_GENERATE_DEBUG_INFO,
    MC_JIT_LOG_VERBOSE,
    MC_JIT_GENERATE_LINE_INFO,
    MC_JIT_CACHE_MODE,
    MC_JIT_NEW_SM3X_OPT,
    MC_JIT_FAST_COMPILE,
    MC_JIT_GLOBAL_SYMBOL_NAMES,
    MC_JIT_GLOBAL_SYMBOL_ADDRESSES,
    MC_JIT_GLOBAL_SYMBOL_COUNT,
    MC_JIT_LTO,
    MC_JIT_FTZ,
    MC_JIT_PREC_DIV,
    MC_JIT_PREC_SQRT,
    MC_JIT_FMA,
    MC_JIT_REFERENCED_KERNEL_NAMES,
    MC_JIT_REFERENCED_KERNEL_COUNT,
    MC_JIT_REFERENCED_VARIABLE_NAMES,
    MC_JIT_REFERENCED_VARIABLE_COUNT,
    MC_JIT_OPTIMIZE_UNUSED_DEVICE_VARIABLES,
    MC_JIT_NUM_OPTIONS
} mcDrvjit_option_t;

/**
 * Online compilation targets
 */
typedef enum mcDrvJitTarget_enum {
    WNV_TARGET_SM_70 = 70, /* for NV_ARCH_V100 */
    WNV_TARGET_SM_75 = 75, /* for NV_ARCH_T4 */
    WNV_TARGET_SM_80 = 80, /* for NV_ARCH_A100 */
} mcDrvJitTarget;

/**
 * Cubin matching fallback strategies
 */
typedef enum mcDrvJitFallback_enum {
    MC_PREFER_PTX = 0, /* Prefer to compile ptx if exact binary match not found */
    MC_PREFER_BINARY   /* Prefer to fall back to compatible binary code if exact match not found */
} mcDrvJitFallback;

/**
 * Caching modes for dlcm
 */
typedef enum mcDrvJitCacheMode_enum {
    MC_JIT_CACHE_OPTION_NONE = 0, /**< Compile with no -dlcm flag specified */
    MC_JIT_CACHE_OPTION_CG,       /**< Compile with L1 cache disabled */
    MC_JIT_CACHE_OPTION_CA        /**< Compile with L1 cache enabled */
} mcDrvJitCacheMode;

typedef enum mcDrvComputeMode_enum {
    MC_COMPUTEMODE_DEFAULT           = 0,
    MC_COMPUTEMODE_PROHIBITED        = 2,
    MC_COMPUTEMODE_EXCLUSIVE_PROCESS = 3
} mcDrvComputeMode;

typedef enum mcDrvDeviceP2PAttribute_enum {
    MC_DEVICE_P2P_ATTRIBUTE_PERFORMANCE_RANK                     = 0x01,  /**< A relative value indicating the performance of the link between two devices */
    MC_DEVICE_P2P_ATTRIBUTE_ACCESS_SUPPORTED                     = 0x02,  /**< P2P Access is enable */
    MC_DEVICE_P2P_ATTRIBUTE_NATIVE_ATOMIC_SUPPORTED              = 0x03,  /**< Atomic operation over the link supported */
    MC_DEVICE_P2P_ATTRIBUTE_CUDA_ARRAY_ACCESS_SUPPORTED          = 0x04   /**< Accessing CUDA arrays over the link supported */
} mcDrvDeviceP2PAttribute;

/*
 * GPUDirect RDMA flush writes targets
 */
typedef enum mcDrvFlushGPUDirectRDMAWritesTarget_enum {
    MC_FLUSH_GPU_DIRECT_RDMA_WRITES_TARGET_CURRENT_CTX = 0 /**< Sets the target for
                                                        * ::mcFlushGPUDirectRDMAWrites() to
                                                        * the currently active MC device context.*/
} mcDrvFlushGPUDirectRDMAWritesTarget;

/**
 * GPUDirect RDMA flush writes scopes
 */
typedef enum mcDrvFlushGPUDirectRDMAWritesScope_enum {
    MC_FLUSH_GPU_DIRECT_RDMA_WRITES_TO_OWNER = 100, /**< Blocks until remote writes are visible to
                                               the MC device con text owning the data.*/
    MC_FLUSH_GPU_DIRECT_RDMA_WRITES_TO_ALL_DEVICES = 200 /**< Blocks until remote writes are visible
                                                      to all MC device contexts.*/
} mcDrvFlushGPUDirectRDMAWritesScope;

/**
 * Flags to specify search options. For more details see ::wcuGetProcAddress
 */
typedef enum mcDrvDriverProcAddressFlags_enum {
    MC_GET_PROC_ADDRESS_DEFAULT       = 0,      /* Default search mode for driver symbols. */
    MC_GET_PROC_ADDRESS_LEGACY_STREAM = 1 << 0, /* Search for legacy versions of driver symbols. */
    MC_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM = 1 << 1 /* Search for per-thread versions of driver symbols. */
} mcDrvDriverProcAddressFlags;

typedef enum mcDrvexecAffinityType_enum {
    MC_EXEC_AFFINITY_TYPE_SM_COUNT = 0,
    MC_EXEC_AFFINITY_TYPE_MAX
} mcDrvexecAffinityType;

typedef struct mcDrvexecAffinitySmCount {
    unsigned int val;
} mcDrvexecAffinitySmCount_t;

/**
 * Execution Affinity Parameters
 */
typedef struct mcDrvexecAffinityParam {
    mcDrvexecAffinityType type;
    union {
        mcDrvexecAffinitySmCount_t smCount;
    } param;
} mcDrvexecAffinityParam_t;

typedef enum mcDrvfuncCache_enum {
    MC_FUNC_CACHE_PREFER_NONE   = 0x00,
    MC_FUNC_CACHE_PREFER_SHARED = 0x01,
    MC_FUNC_CACHE_PREFER_L1     = 0x02,
    MC_FUNC_CACHE_PREFER_EQUAL  = 0x03,
} mcDrvfunc_cache_t;

typedef enum mcDrvlimit_enum {
    MC_LIMIT_STACK_SIZE                       = 0x00,
    MC_LIMIT_PRINTF_FIFO_SIZE                 = 0x01,
    MC_LIMIT_MALLOC_HEAP_SIZE                 = 0x02,
    MC_LIMIT_DEV_RUNTIME_SYNC_DEPTH           = 0x03,
    MC_LIMIT_DEV_RUNTIME_PENDING_LAUNCH_COUNT = 0x04,
    MC_LIMIT_MAX_L2_FETCH_GRANULARITY         = 0x05,
    MC_LIMIT_PERSISTING_L2_CACHE_SIZE         = 0x06,
    MC_LIMIT_MAX
} mcDrvlimit_t;

typedef enum mcDrvsharedconfig_enum {
    MC_SHARED_MEM_CONFIG_DEFAULT_BANK_SIZE    = 0x00, /**< set default shared memory bank size */
    MC_SHARED_MEM_CONFIG_FOUR_BYTE_BANK_SIZE  = 0x01, /**< set shared memory bank width to four bytes */
    MC_SHARED_MEM_CONFIG_EIGHT_BYTE_BANK_SIZE = 0x02  /**< set shared memory bank width to eight bytes */
} mcDrvsharedconfig_t;

// NOTE: mcDrvMemoryAttributeType_t must agree with mcMemRangeAttribute_t.
typedef enum mcDrvMemoryAttributeType_enum {
    MC_MEM_RANGE_ATTRIBUTE_READ_MOSTLY=1,
    MC_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION,
    MC_MEM_RANGE_ATTRIBUTE_ACCESSED_BY,
    MC_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION
} mcDrvMemoryAttributeType_t;

typedef enum mcDrvCtxFlags_enum {
    MC_CTX_SCHED_AUTO          = 0x00, /**< Automatic scheduling */
    MC_CTX_SCHED_SPIN          = 0x01, /**< Set spin as default scheduling */
    MC_CTX_SCHED_YIELD         = 0x02, /**< Set yield as default scheduling */
    MC_CTX_SCHED_BLOCKING_SYNC = 0x04, /**< Set blocking synchronization as default scheduling */
    MC_CTX_BLOCKING_SYNC       = 0x04, /**< Set blocking synchronization as default scheduling
                                        *  \deprecated This flag was deprecated as of CUDA 4.0
                                        *  and was replaced with ::CU_CTX_SCHED_BLOCKING_SYNC. */
    MC_CTX_SCHED_MASK = 0x07,
    MC_CTX_MAP_HOST   = 0x08,         /**< \deprecated This flag was deprecated as of CUDA 11.0
                                       *  and it no longer has any effect. All contexts
                                       *  as of CUDA 3.2 behave as though the flag is enabled. */
    MC_CTX_LMEM_RESIZE_TO_MAX = 0x10, /**< Keep local memory allocation after launch */
    MC_CTX_FLAGS_MASK         = 0x1f
} mcDrvCtxFlags;

/**
 * Stream creation flags
 */
typedef enum mcDrvStreamFlags_enum {
    MC_STREAM_DEFAULT = mcStreamDefault,
    /* Default stream flag */
    MC_STREAM_NON_BLOCKING = mcStreamNonBlocking
    /* Stream does not synchronize with stream 0 (the NULL stream) */
} mcDrvStreamFlags;

/**
 * Event creation flags
 */
typedef enum mcDrvEventFlagsEnum {
    MC_EVENT_DEFAULT        = 0x0, // Default flags
    MC_EVENT_BLOCKING_SYNC  = 0x1, // Event uses blocking synchronization
    MC_EVENT_DISABLE_TIMING = 0x2, // Event will not record timing data
    MC_EVENT_INTERPROCESS   = 0x4  /**< Event is suitable for interprocess use.
                                    * MC_EVENT_DISABLE_TIMING must be set. */
} mcDrvEventFlags;

/**
 * Event record flags
 */
typedef enum mcDrvEventRecordFlagsEnum {
    MC_EVENT_RECORD_DEFAULT  = 0x0, /**< Default event record flag */
    MC_EVENT_RECORD_EXTERNAL = 0x1
    /**< When using stream capture, create an event record node instead of the default behavior.
     * This flag is invalid when used outside of capture. */
} mcDrvEventRecordFlags;

/**
 * Event wait flags
 */
typedef enum mcDrvEventWaitFlagsEnum {
    MC_EVENT_WAIT_DEFAULT  = 0x0, /**< Default event wait flag */
    MC_EVENT_WAIT_EXTERNAL = 0x1
    /**< When using stream capture, create an event wait node instead of the default behavior.
     * This flag is invalid when used outside of capture.*/
} mcDrvEventWaitFlags;

struct mcDrvTexref_st;
typedef struct mcDrvTexref_st *mcDrvTexref;

struct mcDrvSurfref_st;
typedef struct mcDrvSurfref_st *mcDrvSurfref;

typedef unsigned long long mcDrvTexObject;
typedef unsigned long long mcDrvSurfObject;

struct mcDrvExtMemory_st;
typedef struct mcDrvExtMemory_st *mcDrvExternalMemory;

typedef mcIpcEventHandle_t mcDrvIpcEventHandle_t;
typedef mcIpcMemHandle_t mcDrvIpcMemHandle_t;
typedef mcEvent_t mcDrvEvent_t;
typedef mcStream_t mcDrvStream_t;
typedef void (*mcDrvStreamCallback_t)(MCstream stream, mcDrvError_t status, void *userData);
typedef mcFunction_t mcDrvFunction_t;
typedef mcFunction_attribute mcDrvFunction_attribute;
typedef mcModule_t mcDrvModule_t;
typedef mcLinkState_t mcDrvlinkState_t;
typedef mcCtx_t mcDrvContext_t;
typedef mcMemPool_t mcDrvMemPool_t;
typedef mcMemGenericAllocationHandle mcDrvMemGenericAllocationHandle_t;
typedef struct mcMemAccessDesc mcDrvMemAccessDesc_t;
typedef struct mcMemLocation mcDrvMemLocation_t;
typedef enum mcMemAccessFlags mcDrvMemAccess_flags_t;
typedef enum mcMemAllocationHandleType mcDrvMemAllocationHandleType_t;
typedef mcMemAllocationProp mcDrvMemAllocationProp_t;
typedef mcMemAllocationGranularity_flags mcDrvMemAllocationGranularity_flags_t;
typedef mcStreamAttrID mcDrvStreamAttrID;
typedef union mcStreamAttrValue mcDrvStreamAttrValue;
typedef mcStreamBatchMemOpParams mcDrvStreamBatchMemOpParams;
typedef mcStreamWaitValue_flags mcDrvStreamWaitValue_flags;
typedef mcStreamWriteValue_flags mcDrvStreamWriteValue_flags;
typedef struct mcMemPoolPtrExportData mcDrvMemPoolPtrExportData_t;
typedef enum mcMemAllocationType mcDrvMemAllocationType_t;
typedef enum mcMemLocationType mcDrvMemLocationType_t;
typedef struct mcMemPoolProps mcDrvMemPoolProps_t;
typedef enum mcMemPoolAttr mcDrvMemPoolAttr_t;
typedef enum mcSharedCarveout mcDrvSharedCarveout_t;
typedef enum mcResourceType mcDrvResourceType_t;
typedef struct mcAccessPolicywindow mcDrvAccessPolicyWindow;
typedef enum mcUserObjectFlags mvDrvUserObjectFlags;
typedef enum mcUserObjectRetainFlags mcDrvUserObjectRetainFlags;
typedef enum mcSynchronizationPolicy mcDrvSynchronizationPolicy;
typedef mcGraphExecUpdateResult mcDrvGraphExecUpdateResult;
typedef mcBatchMemOpNodeParams mcDrvBatchMemOpNodeParams;

/**
 * Ipc Mem Flags
 */
typedef enum mcDrvIpcMemFlags_enum {
    MC_IPC_MEM_LAZY_ENABLE_PEER_ACCESS = 0x1
    /* Automatically enable peer access between remote devices as needed */
} mcDrvIpcMemFlags;

/**
 * Sparse subresource types
 */
typedef enum mcDrvArraySparseSubresourceType_enum {
    MC_ARRAY_SPARSE_SUBRESOURCE_TYPE_SPARSE_LEVEL = 0,
    MC_ARRAY_SPARSE_SUBRESOURCE_TYPE_MIPTAIL      = 1
} mcDrvArraySparseSubresourceType;

/**
 * Memory operation types
 */
typedef enum mcDrvMemOperationType_enum {
    MC_MEM_OPERATION_TYPE_MAP   = 1,
    MC_MEM_OPERATION_TYPE_UNMAP = 2
} mcDrvMemOperationType;

/**
 * Memory handle types
 */
typedef enum mcDrvMemHandleType_enum {
    MC_MEM_HANDLE_TYPE_GENERIC = 0
} mcDrvMemHandleType;

/**
 * Specifies compression attribute for an allocation.
 */
typedef enum mcDrvMemAllocationCompType_enum {
    MC_MEM_ALLOCATION_COMP_NONE    = 0x0, /* Allocating non-compressible memory */
    MC_MEM_ALLOCATION_COMP_GENERIC = 0x1  /* Allocating  compressible memory */
} mcDrvMemAllocationCompType;

/**
 * This flag if set indicates that the memory will be used as a tile pool.
 */
#define MC_MEM_CREATE_USAGE_TILE_POOL    0x1

/**
 * MACA Mem Attach Flags
 */
typedef enum mcDrvMemAttachFlags_enum {
    MC_MEM_ATTACH_GLOBAL =
        mcMemAttachGlobal, /**< Memory can be accessed by any stream on any device */
    MC_MEM_ATTACH_HOST =
        mcMemAttachHost, /**< Memory cannot be accessed by any stream on any device */
    MC_MEM_ATTACH_SINGLE = mcMemAttachSingle /**< Memory can only be accessed by a single stream on
                                                the associated device */
} mcDrvMemAttachFlags;

/* graph */
typedef struct _mcArray *mcDrvArray_t;
typedef struct GraphNode *mcDrvGraphNode;

struct mcDrvExtSemaphore_st;
typedef struct  mcDrvExtSemaphore_st *mcDrvExternalSemaphore;

typedef struct GraphExec *mcDrvGraphExec;
typedef struct UserObject *mcDrvUserObject;
typedef struct imcGraph *mcDrvGraph;

/**
 * MACA Graphics Interoperability
 */
struct mcDrvGraphicsResource_st;    /* 占位符，为了兼容CUDA的编译，暂未实现 */

typedef struct _mcMipmappedArray_t *mcDrvMipmappedArray; /**< MACA mipmapped array */
typedef struct mcDrvGraphicsResource_st *mcDrvGraphicsResource; /**< MACA graphics interop resource */

/**
 * Specifies the MACA array or MACA mipmapped array memory mapping information
 */
typedef struct mcDrvArrayMapInfo_st {
    mcDrvResourceType_t resourceType; /**< Resource type */

    union {
        mcDrvMipmappedArray mipmap;
        mcDrvArray_t array;
    } resource;

    mcDrvArraySparseSubresourceType subresourceType; /**< Sparse subresource type */

    union {
        struct {
            unsigned int level; /**< For MACA mipmapped arrays must a valid mipmap level. For MACA
                                   arrays must be zero */
            unsigned int layer; /**< For MACA layered arrays must be a valid layer index. Otherwise,
                                   must be zero */
            unsigned int offsetX;      /**< Starting X offset in elements */
            unsigned int offsetY;      /**< Starting Y offset in elements */
            unsigned int offsetZ;      /**< Starting Z offset in elements */
            unsigned int extentWidth;  /**< Width in elements */
            unsigned int extentHeight; /**< Height in elements */
            unsigned int extentDepth;  /**< Depth in elements */
        } sparseLevel;
        struct {
            unsigned int layer; /**< For MACA layered arrays must be a valid layer index. Otherwise,
                                   must be zero */
            unsigned long long offset; /**< Offset within mip tail */
            unsigned long long size;   /**< Extent in bytes */
        } miptail;
    } subresource;

    mcDrvMemOperationType memOperationType; /**< Memory operation type */
    mcDrvMemHandleType memHandleType;       /**< Memory handle type */

    union {
        mcDrvMemGenericAllocationHandle_t memHandle;
    } memHandle;

    unsigned long long offset;  /**< Offset within the memory */
    unsigned int deviceBitMask; /**< Device ordinal bit mask */
    unsigned int flags;         /**< flags for future use, must be zero now. */
    unsigned int reserved[2];   /**< Reserved for future use, must be zero now. */
} mcDrvArrayMapInfo;

/**
 * 3D array descriptor
 */
typedef struct mcDrvArray3DDescriptor_st {
    size_t Width;  /**< Width of 3D array */
    size_t Height; /**< Height of 3D array */
    size_t Depth;  /**< Depth of 3D array */

    mcArray_Format Format;    /**< Array format */
    unsigned int NumChannels; /**< Channels per array element */
    unsigned int Flags;       /**< Flags */
} mcDrvArray3DDescriptor;

typedef struct mcDrvArray3DDescriptorv1_st
{
    unsigned int Width;         /**< Width of 3D array */
    unsigned int Height;        /**< Height of 3D array */
    unsigned int Depth;         /**< Depth of 3D array */

    mcArray_Format Format;      /**< Array format */
    unsigned int NumChannels;   /**< Channels per array element */
    unsigned int Flags;         /**< Flags */
} mcDrvArray3DDescriptorv1;

/**
 * External memory handle types
 */
typedef enum mcDrvExternalMemoryHandleType_enum {
    MC_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD    = 1, /* Handle is an opaque file descriptor */
    MC_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32 = 2, /* Handle is an opaque shared NT handle */
    MC_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT = 3, /* Handle is an opaque, globally shared handle */
    MC_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_HEAP     = 4, /* Handle is a D3D12 heap object */
    MC_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_RESOURCE = 5, /* Handle is a D3D12 committed resource */
    MC_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE = 6, /* Handle is a shared NT handle to a D3D11 resource */
    MC_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE_KMT = 7, /* Handle is a globally shared handle to a D3D11 resource */
    MC_EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF = 8 /* Handle is an NvSciBuf object */
} mcDrvExternalMemoryHandleType;

#define MC_EXTERNAL_MEMORY_DEDICATED                       0x1
#define MC_EXTERNAL_SEMAPHORE_SIGNAL_SKIP_NVSCIBUF_MEMSYNC 0x01
#define MC_EXTERNAL_SEMAPHORE_WAIT_SKIP_NVSCIBUF_MEMSYNC   0x02
#define MC_NVSCISYNC_ATTR_SIGNAL                           0x1
#define MC_NVSCISYNC_ATTR_WAIT                             0x2

/**
 * External memory handle descriptor
 */
typedef struct mcDrvExternalMemoryHandleDesc_st {
    mcDrvExternalMemoryHandleType type; /* Type of the handle */
    union {
        /**
         * File descriptor referencing the memory object. Valid
         * when type is ::MC_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD
         */
        int fd;

        struct {
            /**
             * Valid NT handle. Must be NULL if 'name' is non-NULL
             */
            void *handle;
            /**
             * Name of a valid memory object.
             * Must be NULL if 'handle' is non-NULL.
             */
            const void *name;
        } win32;

        const void *nvSciBufObject;
    } handle;

    /* Size of the memory allocation */
    unsigned long long size;
    /**
     * Flags must either be zero or 0x1
     */
    unsigned int flags;
    unsigned int reserved[16];
} mcDrvExternalMemoryHandleDesc;

/**
 * External memory buffer descriptor
 */
typedef struct mcDrvExternalMemoryBufferDesc_st {
    unsigned long long offset; /* Offset into the memory object where the buffer's base is */
    unsigned long long size;   /* Size of the buffer */
    unsigned int flags;        /* Flags reserved for future use. Must be zero. */
    unsigned int reserved[16];
} mcDrvExternalMemoryBufferDesc;

/**
 * External memory mipmap descriptor
 */
typedef struct mcDrvExternalMemoryMipmappedArrayDesc_st {
    unsigned long long offset;/* Offset into the memory object where the base level of the mipmap chain is. */
    mcDrvArray3DDescriptor arrayDesc; /* Format, dimension and type of base level of the mipmap chain */
    unsigned int numLevels; /* Total number of levels in the mipmap chain */
    unsigned int reserved[16];
} mcDrvExternalMemoryMipmappedArrayDesc;

/**
 * External semaphore handle descriptor
 */
typedef struct mcDrvExternalSemaphorHandleDesc_st {
    mcDrvExternalSemaphoreHandleType type; /* Type of the handle */
    union {
        int fd; /* File descriptor referencing the semaphore object. */

        struct {
            /**
             * Valid NT handle. Must be NULL if 'name' is non-NULL
             */
            void *handle;
            /**
             * Name of a valid synchronization primitive.
             * Must be NULL if 'handle' is non-NULL.
             */
            const void *name;
        } win32; /* Win32 handle referencing the semaphore object. */
        /**
         * Valid NvSciSyncObj. Must be non NULL
         */
        const void *nvSciSyncObj;
    } handle;
    /**
     * Flags reserved for the future. Must be zero.
     */
    unsigned int flags;
    unsigned int reserved[16];
} mcDrvExternalSemaphorHandleDesc;

/**
 * External semaphore signal parameters
 */
typedef struct mcDrvExternalSemaphoreSignalParams_st {
    struct {
        /**
         * Parameters for fence objects
         */
        struct {
            /**
             * Value of fence to be signaled
             */
            unsigned long long value;
        } fence;
        union {
            void *fence;
            unsigned long long reserved;
        } nvSciSync;
        /**
         * Parameters for keyed mutex objects
         */
        struct {
            /**
             * Value of key to release the mutex with
             */
            unsigned long long key;
        } keyedMutex;
        unsigned int reserved[12];
    } params;
    unsigned int flags;
    unsigned int reserved[16];
} mcDrvExternalSemaphoreSignalParams;

/**
 * Semaphore signal node parameters
 */
typedef struct mcDrvExtSemSignalNodeParams_st {
    mcDrvExternalSemaphore *extSemArray; /**< Array of external semaphore handles. */
    const mcDrvExternalSemaphoreSignalParams *paramsArray;/**< Array of external semaphore signal parameters. */
    unsigned int numExtSems;/**< Number of handles and parameters supplied in extSemArray and paramsArray. */
} mcDrvExtSemSignalNodeParams;

typedef enum mcDrvGraphMem_attribute_enum {
    MC_GRAPH_MEM_ATTR_USED_MEM_CURRENT, // Amount of memory, in bytes, currently associated with
                                        // graphs

    MC_GRAPH_MEM_ATTR_USED_MEM_HIGH, // High watermark of memory, in bytes, associated with graphs
                                     // since the last time it was reset. High watermark can only be
                                     // reset to zero.

    MC_GRAPH_MEM_ATTR_RESERVED_MEM_CURRENT, // Amount of memory, in bytes, currently allocated for
                                            // use by the MC graphs asynchronous allocator.

    MC_GRAPH_MEM_ATTR_RESERVED_MEM_HIGH // High watermark of memory, in bytes, currently allocated
                                        // for use by the MC graphs asynchronous allocator.
} mcDrvGraphMem_attribute;

/**
 * External semaphore wait parameters
 */
typedef struct mcDrvExternalSemaphoreWaitParams_st {
    struct {
        /* Parameters for fence objects */
        struct {
            unsigned long long value; /* Value of fence to be waited on  */
        } fence;

        union {
            void *fence;
            unsigned long long reserved;
        } nvSciSync;
        /* Parameters for keyed mutex objects */
        struct {
            /* Value of key to acquire the mutex with */
            unsigned long long key;
            /* Timeout in milliseconds to wait to acquire the mutex */
            unsigned int timeoutMs;
        } keyedMutex;
        unsigned int reserved[10];
    } params;

    unsigned int flags;
    unsigned int reserved[16];
} mcDrvExternalSemaphoreWaitParams;

/**
 * Semaphore wait node parameters
 */
typedef struct mcDrvExtSemWaitNodeParams_st {
    mcDrvExternalSemaphore *extSemArray;                 /**< Array of external semaphore handles. */
    const mcDrvExternalSemaphoreWaitParams* paramsArray; /**< Array of external semaphore wait parameters. */
    unsigned int numExtSems;                             /**< Number of handles and parameters supplied in extSemArray and paramsArray. */
} mcDrvExtSemWaitNodeParams;

/**
 * Host node parameters
 */
typedef struct mcDrvHostNodeParams_st {
    mcHostFn_t fn;  /**< The function to call when the node executes */
    void *userData; /**< Argument to pass to the function */
} mcDrvHostNodeParams;

/**
 * GPU kernel node parameters
 */
typedef struct mcDrvKernelNodeParams_st {
    mcDrvFunction_t func;        /**< Kernel to launch */
    unsigned int gridDimX;       /**< Width of grid in blocks */
    unsigned int gridDimY;       /**< Height of grid in blocks */
    unsigned int gridDimZ;       /**< Depth of grid in blocks */
    unsigned int blockDimX;      /**< X dimension of each thread block */
    unsigned int blockDimY;      /**< Y dimension of each thread block */
    unsigned int blockDimZ;      /**< Z dimension of each thread block */
    unsigned int sharedMemBytes; /**< Dynamic shared-memory size per thread block in bytes */
    void **kernelParams;         /**< Array of pointers to kernel parameters */
    void **extra;                /**< Extra options */
} mcDrvKernelNodeParams;

/**
 * Memory allocation node parameters
 */
typedef struct mcDrvMemAllocNodeParams_st {
    /**
     * in: location where the allocation should reside (specified in ::location).
     * ::handleTypes must be ::mcMemHandleTypeNone. IPC is not supported.
     */
    mcDrvMemPoolProps_t poolProps;
    const mcDrvMemAccessDesc_t *accessDescs; /**< in: array of memory access descriptors.
                                              * Used to describe peer GPU access */
    size_t accessDescCount;                  /**< in: number of memory access descriptors.
                                              * Must not exceed the number of GPUs. */
    size_t bytesize;                         /**< in: size in bytes of the requested allocation */
    mcDrvDeviceptr_t dptr;                   /**< out: address of the allocation returned by MACA */
} mcDrvMemAllocNodeParams;

/**
 * Indicates that the layered sparse MACA array or MACA mipmapped array has a single mip tail region for all layers
 */
#define MC_ARRAY_SPARSE_PROPERTIES_SINGLE_MIPTAIL 0x1

/**
 * MACA array sparse properties
 */
typedef struct mcDrvArraySparseProperties_st {
    struct {
        unsigned int width;  /**< Width of sparse tile in elements */
        unsigned int height; /**< Height of sparse tile in elements */
        unsigned int depth;  /**< Depth of sparse tile in elements */
    } tileExtent;

    /**
     * First mip level at which the mip tail begins.
     */
    unsigned int miptailFirstLevel;
    /**
     * Total size of the mip tail.
     */
    unsigned long long miptailSize;
    /**
     * Flags will either be zero or 0x1
     */
    unsigned int flags;
    unsigned int reserved[4];
} mcDrvArraySparseProperties;

/**
 * CUDA array memory requirements
 */
typedef struct mcDrvArrayMemoryRequirements_st {
    size_t size;      /**< Total required memory size */
    size_t alignment; /**< alignment requirement */
    unsigned int reserved[4];
} mcDrvArrayMemoryRequirements;

/**
 * Memset node parameters
 */
typedef struct mcDrvMemsetNodeParams_st {
    mcDrvDeviceptr_t dst;     /**< Destination device pointer */
    size_t pitch;             /**< Pitch of destination device pointer. Unused if height is 1 */
    unsigned int value;       /**< Value to be set */
    unsigned int elementSize; /**< Size of each element in bytes. Must be 1, 2, or 4. */
    size_t width;             /**< Width of the row in elements */
    size_t height;            /**< Number of rows */
} mcDrvMemsetNodeParams;

typedef enum mcDrvkernelNodeAttrID_enum {
    MC_KERNEL_NODE_ATTRIBUTE_ACCESS_POLICY_WINDOW = 1,
    MC_KERNEL_NODE_ATTRIBUTE_COOPERATIVE          = 2,
    MC_KERNEL_NODE_ATTRIBUTE_PRIORITY             = 8
} mcDrvkernelNodeAttrID;

/**
 * Graph kernel node attributes union, used with
 * ::wcuKernelNodeSetAttribute/::wcuKernelNodeGetAttribute
 */
typedef union mcDrvKernelNodeAttrValue_st {
    mcDrvAccessPolicyWindow accessPolicyWindow; /* Attribute ::CUaccessPolicyWindow. */
    int cooperative;                            /* Nonzero indicates a cooperative kernel
                                                 * (see ::wcuLaunchCooperativeKernel). */
} mcDrvKernelNodeAttrValue;

typedef struct mcDrvGraphExecUpdateResultInfo_st {
    mcDrvGraphNode errorFromNode;
    /* The from node of error edge when the topologies do not match. Otherwise NULL. */
    mcDrvGraphNode errorNode;
    /* The "to node" of the error edge when the topologies do not match. The error node when the
    error is associated with a specific node. NULL when the error is generic */
    mcDrvGraphExecUpdateResult result;
    /* Gives more specific detail when a maca graph update fails. */
} mcDrvGraphExecUpdateResultInfo;

typedef struct mcDrvGraphInstantiateParams_st {
    mcuint64_t flags;
    mcDrvGraphNode hErrNode_out;
    mcDrvStream_t hUploadStream;
    mcGraphInstantiateResult result_out;
} mcDrvGraphInstantiateParams;

typedef enum mcDrvGraphNodeType_enum {
    MC_GRAPH_NODE_TYPE_KERNEL           = 0,  /**< GPU kernel node */
    MC_GRAPH_NODE_TYPE_MEMCPY           = 1,  /**< Memcpy node */
    MC_GRAPH_NODE_TYPE_MEMSET           = 2,  /**< Memset node */
    MC_GRAPH_NODE_TYPE_HOST             = 3,  /**< Host (executable) node */
    MC_GRAPH_NODE_TYPE_GRAPH            = 4,  /**< Node which executes an embedded graph */
    MC_GRAPH_NODE_TYPE_EMPTY            = 5,  /**< Empty (no-op) node */
    MC_GRAPH_NODE_TYPE_WAIT_EVENT       = 6,  /**< External event wait node */
    MC_GRAPH_NODE_TYPE_EVENT_RECORD     = 7,  /**< External event record node */
    MC_GRAPH_NODE_TYPE_EXT_SEMAS_SIGNAL = 8,  /**< External semaphore signal node */
    MC_GRAPH_NODE_TYPE_EXT_SEMAS_WAIT   = 9,  /**< External semaphore wait node */
    MC_GRAPH_NODE_TYPE_MEM_ALLOC        = 10, /**< Memory Allocation Node */
    MC_GRAPH_NODE_TYPE_MEM_FREE         = 11, /**< Memory Free Node */
    MC_GRAPH_NODE_TYPE_BATCH_MEM_OP     = 12  /**< Batch MemOp Node */
} mcDrvGraphNodeType;

/**
 * Flags to register a graphics resource
 */
typedef enum mcDrvGraphicsRegisterFlags_enum {
    MC_GRAPHICS_REGISTER_FLAGS_NONE           = 0x00,
    MC_GRAPHICS_REGISTER_FLAGS_READ_ONLY      = 0x01,
    MC_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD  = 0x02,
    MC_GRAPHICS_REGISTER_FLAGS_SURFACE_LDST   = 0x04,
    MC_GRAPHICS_REGISTER_FLAGS_TEXTURE_GATHER = 0x08
} mcDrvGraphicsRegisterFlags;

/**
 * Flags for mapping and unmapping interop resources
 */
typedef enum mcDrvGraphicsMapResourceFlags_enum {
    MC_GRAPHICS_MAP_RESOURCE_FLAGS_NONE          = 0x00,
    MC_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY     = 0x01,
    MC_GRAPHICS_MAP_RESOURCE_FLAGS_WRITE_DISCARD = 0x02
} mcDrvGraphicsMapResourceFlags;

/**
 * Array indices for cube faces
 */
typedef enum mcDrvArrayCubemapFace_enum {
    MC_CUBEMAP_FACE_POSITIVE_X = 0x00, /**< Positive X face of cubemap */
    MC_CUBEMAP_FACE_NEGATIVE_X = 0x01, /**< Negative X face of cubemap */
    MC_CUBEMAP_FACE_POSITIVE_Y = 0x02, /**< Positive Y face of cubemap */
    MC_CUBEMAP_FACE_NEGATIVE_Y = 0x03, /**< Negative Y face of cubemap */
    MC_CUBEMAP_FACE_POSITIVE_Z = 0x04, /**< Positive Z face of cubemap */
    MC_CUBEMAP_FACE_NEGATIVE_Z = 0x05  /**< Negative Z face of cubemap */
} mcDrvArrayCubemapFace;

/* Texture */
typedef struct mcDrvTexref_st *mcDrvTexref;

/**
 * 2D memory copy parameters
 */
#define mcDrvMemcpy2D_st MACA_MEMCPY2D_st
#define mcDrvMemcpy2D    MACA_MEMCPY2D

typedef struct mcDrvMemcpy2Dv1_st {
    unsigned int srcXInBytes;     /**< Source X in bytes */
    unsigned int srcY;            /**< Source Y */
    mcMemoryType srcMemoryType;   /**< Source memory type (host, device, array) */
    const void *srcHost;          /**< Source host pointer */
    mcDrvDeviceptrv1_t srcDevice; /**< Source device pointer */
    mcDrvArray_t srcArray;        /**< Source array reference */
    unsigned int srcPitch;        /**< Source pitch (ignored when src is array) */

    unsigned int dstXInBytes;     /**< Destination X in bytes */
    unsigned int dstY;            /**< Destination Y */
    mcMemoryType dstMemoryType;   /**< Destination memory type (host, device, array) */
    void *dstHost;                /**< Destination host pointer */
    mcDrvDeviceptrv1_t dstDevice; /**< Destination device pointer */
    mcDrvArray_t dstArray;        /**< Destination array reference */
    unsigned int dstPitch;        /**< Destination pitch (ignored when dst is array) */

    unsigned int WidthInBytes; /**< Width of 2D memory copy in bytes */
    unsigned int Height;       /**< Height of 2D memory copy */
} mcDrvMemcpy2Dv1;

/**
 * 3D memory copy parameters
 */
#define mcDrvMemcpy3D_st MACA_MEMCPY3D_st
#define mcDrvMemcpy3D    MACA_MEMCPY3D

typedef struct mcDrvMemcpy3Dv1_st {
    unsigned int srcXInBytes;     /**< Source X in bytes */
    unsigned int srcY;            /**< Source Y */
    unsigned int srcZ;            /**< Source Z */
    unsigned int srcLOD;          /**< Source LOD */
    mcMemoryType srcMemoryType;   /**< Source memory type (host, device, array) */
    const void *srcHost;          /**< Source host pointer */
    mcDrvDeviceptrv1_t srcDevice; /**< Source device pointer */
    mcDrvArray_t srcArray;        /**< Source array reference */
    void *reserved0;              /**< Must be NULL */
    unsigned int srcPitch;        /**< Source pitch (ignored when src is array) */
    unsigned int srcHeight; /**< Source height (ignored when src is array; may be 0 if Depth==1) */

    unsigned int dstXInBytes;     /**< Destination X in bytes */
    unsigned int dstY;            /**< Destination Y */
    unsigned int dstZ;            /**< Destination Z */
    unsigned int dstLOD;          /**< Destination LOD */
    mcMemoryType dstMemoryType;   /**< Destination memory type (host, device, array) */
    void *dstHost;                /**< Destination host pointer */
    mcDrvDeviceptrv1_t dstDevice; /**< Destination device pointer */
    mcDrvArray_t dstArray;        /**< Destination array reference */
    void *reserved1;              /**< Must be NULL */
    unsigned int dstPitch;        /**< Destination pitch (ignored when dst is array) */
    unsigned int
        dstHeight; /**< Destination height (ignored when dst is array; may be 0 if Depth==1) */

    unsigned int WidthInBytes; /**< Width of 3D memory copy in bytes */
    unsigned int Height;       /**< Height of 3D memory copy */
    unsigned int Depth;        /**< Depth of 3D memory copy */
} mcDrvMemcpy3Dv1;

/**
 * 3D memory cross-context copy parameters
 */
typedef struct mcDrvMemcpy3DPeer_st {
    size_t srcXInBytes;         /**< Source X in bytes */
    size_t srcY;                /**< Source Y */
    size_t srcZ;                /**< Source Z */
    size_t srcLOD;              /**< Source LOD */
    mcMemoryType srcMemoryType; /**< Source memory type (host, device, array) */
    const void *srcHost;        /**< Source host pointer */
    mcDrvDeviceptr_t srcDevice; /**< Source device pointer */
    mcDrvArray_t srcArray;      /**< Source array reference */
    mcDrvContext_t
        srcContext;   /**< Source context (ignored with srcMemoryType is ::mcMemoryTypeArray) */
    size_t srcPitch;  /**< Source pitch (ignored when src is array) */
    size_t srcHeight; /**< Source height (ignored when src is array; may be 0 if Depth==1) */

    size_t dstXInBytes;         /**< Destination X in bytes */
    size_t dstY;                /**< Destination Y */
    size_t dstZ;                /**< Destination Z */
    size_t dstLOD;              /**< Destination LOD */
    mcMemoryType dstMemoryType; /**< Destination memory type (host, device, array) */
    void *dstHost;              /**< Destination host pointer */
    mcDrvDeviceptr_t dstDevice; /**< Destination device pointer */
    mcDrvArray_t dstArray;      /**< Destination array reference */
    mcDrvContext_t
        dstContext;  /**< Destination context (ignored with dstMemoryType is ::mcMemoryTypeArray) */
    size_t dstPitch; /**< Destination pitch (ignored when dst is array) */
    size_t dstHeight; /**< Destination height (ignored when dst is array; may be 0 if Depth==1) */

    size_t WidthInBytes; /**< Width of 3D memory copy in bytes */
    size_t Height;       /**< Height of 3D memory copy */
    size_t Depth;        /**< Depth of 3D memory copy */
} mcDrvMemcpy3DPeer;

/**
 * Array descriptor
 */
typedef struct mcDrvArrayDescriptor_st {
    size_t Width;  /**< Width of array */
    size_t Height; /**< Height of array */

    mcArray_Format Format;    /**< Array format */
    unsigned int NumChannels; /**< Channels per array element */
} mcDrvArrayDescriptor;

typedef struct mcDrvArrayDescriptorv1_st
{
    unsigned int Width;         /**< Width of array */
    unsigned int Height;        /**< Height of array */

    mcArray_Format Format;      /**< Array format */
    unsigned int NumChannels;   /**< Channels per array element */
} mcDrvArrayDescriptorv1;

/**
 * Occupancy calculator flag
 */
typedef enum mcDrvOccupancyFlagsEnum {
    MC_OCCUPANCY_DEFAULT                  = mcOccupancyDefault, /**< Default behavior */
    MC_OCCUPANCY_DISABLE_CACHING_OVERRIDE = mcOccupancyDisableCachingOverride
    /**< Assume global caching is enabled and cannot be automatically turned off */
} mcDrvOccupancyFlags;

/**
 * Texture reference addressing modes
 */
typedef enum mcDrvAddressMode_enum {
    MC_TR_ADDRESS_MODE_WRAP   = 0, /**< Wrapping address mode */
    MC_TR_ADDRESS_MODE_CLAMP  = 1, /**< Clamp to edge address mode */
    MC_TR_ADDRESS_MODE_MIRROR = 2, /**< Mirror address mode */
    MC_TR_ADDRESS_MODE_BORDER = 3  /**< Border address mode */
} mcDrvAddress_mode;

/**
 * Texture reference filtering modes
 */
typedef enum mcDrvFilterMode_enum {
    MC_TR_FILTER_MODE_POINT  = 0, /**< Point filter mode */
    MC_TR_FILTER_MODE_LINEAR = 1  /**< Linear filter mode */
} mcDrvFilter_mode;

/* Texture Object */

typedef enum mcDrvResourcetype_enum {
    MC_RESOURCE_TYPE_ARRAY           = 0x00, /**< Array resoure */
    MC_RESOURCE_TYPE_MIPMAPPED_ARRAY = 0x01, /**< Mipmapped array resource */
    MC_RESOURCE_TYPE_LINEAR          = 0x02, /**< Linear resource */
    MC_RESOURCE_TYPE_PITCH2D         = 0x03  /**< Pitch 2D resource */
} mcDrvResourcetype;

/**
 * MACA Resource descriptor
 */
typedef struct mcDrvResourceDesc_st {
    mcDrvResourcetype resType; /**< Resource type */

    union {
        struct {
            MCarray hArray; /**< MACA array */
        } array;
        struct {
            mcDrvMipmappedArray hMipmappedArray; /**< MACA mipmapped array */
        } mipmap;
        struct {
            mcDrvDeviceptr_t devPtr;  /**< Device pointer */
            mcArray_Format format;    /**< Array format */
            unsigned int numChannels; /**< Channels per array element */
            size_t sizeInBytes;       /**< Size in bytes */
        } linear;
        struct {
            mcDrvDeviceptr_t devPtr;  /**< Device pointer */
            mcArray_Format format;    /**< Array format */
            unsigned int numChannels; /**< Channels per array element */
            size_t width;             /**< Width of the array in elements */
            size_t height;            /**< Height of the array in elements */
            size_t pitchInBytes;      /**< Pitch between two rows in bytes */
        } pitch2D;
        struct {
            int reserved[32];
        } reserved;
    } res;

    unsigned int flags; /**< Flags (must be zero) */
} mcDrvResourceDesc;

/**
 * Texture descriptor
 */
typedef struct mcDrvTextureDesc_st {
    mcDrvAddress_mode addressMode[3];  /**< Address modes */
    mcDrvFilter_mode filterMode;       /**< Filter mode */
    unsigned int flags;                /**< Flags */
    unsigned int maxAnisotropy;        /**< Maximum anisotropy ratio */
    mcDrvFilter_mode mipmapFilterMode; /**< Mipmap filter mode */
    float mipmapLevelBias;             /**< Mipmap level bias */
    float minMipmapLevelClamp;         /**< Mipmap minimum level clamp */
    float maxMipmapLevelClamp;         /**< Mipmap maximum level clamp */
    float borderColor[4];              /**< Border Color */
    int reserved[12];
} mcDrvTextureDesc;

typedef enum mcDrvResourceViewFormat_enum {
    MC_RES_VIEW_FORMAT_NONE = 0x00, /**< No resource view format (use underlying resource format) */
    MC_RES_VIEW_FORMAT_UINT_1X8      = 0x01, /**< 1 channel unsigned 8-bit integers */
    MC_RES_VIEW_FORMAT_UINT_2X8      = 0x02, /**< 2 channel unsigned 8-bit integers */
    MC_RES_VIEW_FORMAT_UINT_4X8      = 0x03, /**< 4 channel unsigned 8-bit integers */
    MC_RES_VIEW_FORMAT_SINT_1X8      = 0x04, /**< 1 channel signed 8-bit integers */
    MC_RES_VIEW_FORMAT_SINT_2X8      = 0x05, /**< 2 channel signed 8-bit integers */
    MC_RES_VIEW_FORMAT_SINT_4X8      = 0x06, /**< 4 channel signed 8-bit integers */
    MC_RES_VIEW_FORMAT_UINT_1X16     = 0x07, /**< 1 channel unsigned 16-bit integers */
    MC_RES_VIEW_FORMAT_UINT_2X16     = 0x08, /**< 2 channel unsigned 16-bit integers */
    MC_RES_VIEW_FORMAT_UINT_4X16     = 0x09, /**< 4 channel unsigned 16-bit integers */
    MC_RES_VIEW_FORMAT_SINT_1X16     = 0x0a, /**< 1 channel signed 16-bit integers */
    MC_RES_VIEW_FORMAT_SINT_2X16     = 0x0b, /**< 2 channel signed 16-bit integers */
    MC_RES_VIEW_FORMAT_SINT_4X16     = 0x0c, /**< 4 channel signed 16-bit integers */
    MC_RES_VIEW_FORMAT_UINT_1X32     = 0x0d, /**< 1 channel unsigned 32-bit integers */
    MC_RES_VIEW_FORMAT_UINT_2X32     = 0x0e, /**< 2 channel unsigned 32-bit integers */
    MC_RES_VIEW_FORMAT_UINT_4X32     = 0x0f, /**< 4 channel unsigned 32-bit integers */
    MC_RES_VIEW_FORMAT_SINT_1X32     = 0x10, /**< 1 channel signed 32-bit integers */
    MC_RES_VIEW_FORMAT_SINT_2X32     = 0x11, /**< 2 channel signed 32-bit integers */
    MC_RES_VIEW_FORMAT_SINT_4X32     = 0x12, /**< 4 channel signed 32-bit integers */
    MC_RES_VIEW_FORMAT_FLOAT_1X16    = 0x13, /**< 1 channel 16-bit floating point */
    MC_RES_VIEW_FORMAT_FLOAT_2X16    = 0x14, /**< 2 channel 16-bit floating point */
    MC_RES_VIEW_FORMAT_FLOAT_4X16    = 0x15, /**< 4 channel 16-bit floating point */
    MC_RES_VIEW_FORMAT_FLOAT_1X32    = 0x16, /**< 1 channel 32-bit floating point */
    MC_RES_VIEW_FORMAT_FLOAT_2X32    = 0x17, /**< 2 channel 32-bit floating point */
    MC_RES_VIEW_FORMAT_FLOAT_4X32    = 0x18, /**< 4 channel 32-bit floating point */
    MC_RES_VIEW_FORMAT_UNSIGNED_BC1  = 0x19, /**< Block compressed 1 */
    MC_RES_VIEW_FORMAT_UNSIGNED_BC2  = 0x1a, /**< Block compressed 2 */
    MC_RES_VIEW_FORMAT_UNSIGNED_BC3  = 0x1b, /**< Block compressed 3 */
    MC_RES_VIEW_FORMAT_UNSIGNED_BC4  = 0x1c, /**< Block compressed 4 unsigned */
    MC_RES_VIEW_FORMAT_SIGNED_BC4    = 0x1d, /**< Block compressed 4 signed */
    MC_RES_VIEW_FORMAT_UNSIGNED_BC5  = 0x1e, /**< Block compressed 5 unsigned */
    MC_RES_VIEW_FORMAT_SIGNED_BC5    = 0x1f, /**< Block compressed 5 signed */
    MC_RES_VIEW_FORMAT_UNSIGNED_BC6H = 0x20, /**< Block compressed 6 unsigned half-float */
    MC_RES_VIEW_FORMAT_SIGNED_BC6H   = 0x21, /**< Block compressed 6 signed half-float */
    MC_RES_VIEW_FORMAT_UNSIGNED_BC7  = 0x22  /**< Block compressed 7 */
} mcDrvResourceViewFormat;

/**
 * Resource view descriptor
 */
typedef struct mcDrvResourceViewDesc_st {
    mcDrvResourceViewFormat format; /**< Resource view format */
    size_t width;                   /**< Width of the resource view */
    size_t height;                  /**< Height of the resource view */
    size_t depth;                   /**< Depth of the resource view */
    unsigned int firstMipmapLevel;  /**< First defined mipmap level */
    unsigned int lastMipmapLevel;   /**< Last defined mipmap level */
    unsigned int firstLayer;        /**< First layer index */
    unsigned int lastLayer;         /**< Last layer index */
    unsigned int reserved[16];
} mcDrvResourceViewDesc;

/**
 * GPU Direct v3 tokens
 */
typedef struct mcDrvPointerAttributeP2PTokens_st {
    unsigned long long p2pToken;
    unsigned int vaSpaceToken;
} mcDrvPointerAttributeP2PTokens;

/**
 * Access flags that specify the level of access the current context's device has
 * on the memory referenced.
 */
typedef enum mcDrvPointerAttributeAccessFlags_enum {
    MC_POINTER_ATTRIBUTE_ACCESS_FLAG_NONE = 0x0,
    /**< No access, meaning the device cannot access this memory at all, thus must be
                staged through accessible memory in order to complete certain operations */
    MC_POINTER_ATTRIBUTE_ACCESS_FLAG_READ = 0x1,
    /**< Read-only access, meaning writes to this memory are considered invalid accesses
                and thus return error in that case. */
    MC_POINTER_ATTRIBUTE_ACCESS_FLAG_READWRITE = 0x3
    /**< Read-write access, the device has full read-write access to the memory */
} mcDrvPointerAttributeAccessFlags;

/**
 * Kernel launch parameters
 */
typedef struct mcDrvLaunchParams_st {
    mcDrvFunction_t function;    /**< Kernel to launch */
    unsigned int gridDimX;       /**< Width of grid in blocks */
    unsigned int gridDimY;       /**< Height of grid in blocks */
    unsigned int gridDimZ;       /**< Depth of grid in blocks */
    unsigned int blockDimX;      /**< X dimension of each thread block */
    unsigned int blockDimY;      /**< Y dimension of each thread block */
    unsigned int blockDimZ;      /**< Z dimension of each thread block */
    unsigned int sharedMemBytes; /**< Dynamic shared-memory size per thread block in bytes */
    mcDrvStream_t hStream;       /**< Stream identifier */
    void **kernelParams;         /**< Array of pointers to kernel parameters */
} mcDrvLaunchParams_v1;
typedef mcDrvLaunchParams_v1 mcDrvLaunchParams;

/* Surface */

/**< An opaque value that represents a MACA surface object */
typedef unsigned long long mcDrvSurfObject;
typedef struct mcDrvSurfref_st *mcDrvSurfref;

/**
 * MACA Occupancy type and flags.
 */

#include "cuda_driver_wrapper_typedef.h"
#endif
